***********************************
** The Effects of House Prices and Home Equity Extraction on Career Outcomes
** by Jacelly Cespedes, Zack Liu, and Carlos Parra
***********************************

// Directories
global data C:\Users\Dropbox\Data



***********************************
** data
***********************************

// Merge main datasets
cd "$data"
use "sample_people_projects", clear // individuals' information

sort tconst, stable
merge tconst using "out_projects_active_people_all_complete" // projects' information
tab _m
drop if _m==2
drop _merge

sort nconst, stable
merge nconst using "leading_role_mean_2008" // leading role
tab _m
drop if _m==2
drop _merge

sort nconst, stable
merge nconst using "leading_role_mean_2006" // leading role pre-treatment
tab _m
drop if _m==2
drop _merge

sort tconst, stable
merge tconst using "project_duration_all" // filming duration
tab _m
drop if _m==2
drop _merge

sort nconst, stable
merge nconst using "mean_producer_credits_all" // producer credits short-term
tab _m
drop if _m==2
drop _merge

sort nconst, stable
merge nconst using "mean_producer_credits_all_2013" // producer credits long-term
tab _m
drop if _m==2
drop _merge

sort nconst, stable
merge nconst using "starmeter_all" // starmeter
tab _m
drop _m

sort nconst, stable
merge nconst using "num_awards_pretreatment" // awards pre-treatment
tab _m
drop if _m==2
drop _m

sort nconst, stable
merge nconst using "high_profile_project_pre" // high profile project pre-treatment
tab _m
drop _m

sort nconst, stable
merge nconst using "awards_data" // award variables  
tab _m
drop _m

sort tconst, stable
merge tconst using "high_profile_projects"  // high-profile project
tab _m
drop _m

sort tconst, stable
merge tconst using "crew_size_data"  // crew size 
tab _m
drop _m

sort nconst, stable
merge nconst using "liens_data"  // // liens
tab _m
drop _m

sort nconst, stable
merge nconst using "movie_ratings"  // // movie rating
tab _m
drop _m


// Filming duration
bysort tconst: egen filming_dur_n=max(filming_duration)  

// Extensive Margin: binary variable for working or not by year
sort nconst startyear, stable
bys nconst startyear: egen num_projects_y=count(tconst)
label var num_projects_y "total number of projects per individual per year"

sort nconst startyear, stable
bys nconst startyear: egen num_big_projects_y0=count(tconst) if titletype=="movie" 
bys nconst startyear: egen num_big_projects_y=max(num_big_projects_y0)
replace num_big_projects_y=0 if num_big_projects_y==.

sort nconst startyear, stable
bys nconst startyear: gen active_year=(num_projects_y>=1)
label var active_year "indicator variable equal to one if the individual had at least one project in year t"

// Intensive margin: number of projects per year
sort nconst startyear titletype, stable
bys nconst startyear: egen num_movie_y=count(tconst) if titletype=="movie"
label var num_movie_y "number of movies in year t"
tab num_movie_y

// Size of the project
sort nconst startyear, stable
bysort nconst: egen crew_size_mean_2=mean(num_crew) if startyear >= 2008 & startyear <= 2012
bysort nconst: egen crew_size_mean=max(crew_size_mean_2) if startyear >= 2008 & startyear <= 2012

gen ln_crew_size_mean=ln(1+crew_size_mean) if crew_size_mean!=.

// Intensive marging by project size
sort nconst startyear titletype, stable
bys nconst startyear: egen num_movie_big_yq4=count(tconst) if titletype=="movie" & crew_size_q4==1
summ num_movie_big_yq4, det
label var num_movie_big_yq4 "number of big movies q4 in year t"

sort nconst startyear titletype, stable
bys nconst startyear: egen num_movie_small_yq1=count(tconst) if titletype=="movie" & crew_size_q1==1
summ num_movie_small_yq1, det
label var num_movie_small_yq1 "number of small movies q1 in year t"

// Collapsing dataset by year and individual
collapse (max) won_nom_award_pre num_hp_movie_y4 num_hp_movie_y3 high_profile_project_other movie_rating_pre leading_role_mean_pre num_awards_pre_2007 num_hp_movie_y num_movie_y active_year (max) movie_rating won_nom_award ln_crew_size_mean producer_mean producer_mean_2_13 leading_role_mean, by(startyear nconst)

sa "Temp/data_inter_2", replace

// Merge the collapsed database per year and individual with other datasets
use "Temp/data_inter_2", clear

rename startyear year

save "Temp/temp1", replace

merge 1:1 nconst year using "Temp/zillow_house_wealth" // Zillow data homeowners
keep if _merge == 3 | _merge == 1
drop _merge

merge 1:1 nconst year using "Temp/zillow_house_wealth_renters" // Zillow data renters
keep if _merge == 3 | _merge == 1
drop _merge

replace renter = 0 if renter == . 
replace first = first_renter if renter == 1
replace zip = zip_renter if renter == 1
replace total_house_wealth = total_renter if renter == 1

drop if total_house_wealth == . 

// Number of movies 2013 to 2018
preserve 
keep if year >= 2013 & year <= 2018
collapse (sum) total_movie_1318 = num_movie_y total_hp_movie_1318 = num_hp_movie_y , by(nconst)
keep nconst total_movie_1318 total_hp_movie_1318
save "Temp/movie_1318", replace
restore

// Active individuals
keep if year >= 2002 & year <= 2012

preserve 
keep if year >= 2003 & year <= 2006
collapse (max) num_movie_y (sum) movie_0306 = num_movie_y, by(nconst)
drop if num_movie_y == .  
gen active = 1
save "Temp/active_temp", replace
restore

mmerge nconst using "Temp/active_temp"

// Pre-treatment controls
global period year >= 2003 & year <= 2006

bysort nconst: egen total_movie_bq4_pre_ = total(num_movie_big_yq4) if $period
bysort nconst: egen total_movie_bq4_pre = max(total_movie_bq4_pre_)
drop total_movie_bq4_pre_

bysort nconst: egen total_movie_sq1_pre_ = total(num_movie_small_yq1) if $period
bysort nconst: egen total_movie_sq1_pre = max(total_movie_sq1_pre_)
drop total_movie_sq1_pre_

bysort nconst: egen total_movie_pre_ = total(num_movie_y) if $period
bysort nconst: egen total_movie_pre = max(total_movie_pre_ )
drop total_movie_pre_

gen total_hp_movie_pre=num_hp_movie_y_pre
gen hp_movie_dum_pre = 1 if total_hp_movie_pre >0
replace hp_movie_dum_pre = 0 if hp_movie_dum_pre == . 

// Crossection 2008 - 2012
keep if year >= 2008 & year<=2012

replace num_movie_y = 0 if num_movie_y == . 
bysort nconst: egen total_movie = total(num_movie_y)

*** Intensive: hp movies
bysort nconst: egen total_hp_movie = total(num_hp_movie_y)

*** Intensive margin: small and big
bysort nconst: egen total_movie_bq4 = total(num_movie_big_yq4)
bysort nconst: egen total_movie_sq1 = total(num_movie_small_yq1)

*** Since all years have the same numbers, keep only one year (i.e., crossection)
keep if year == 2012

// Merge with other datasets
*** Merge with fips
mmerge zip using "census/zip_fips_v2"
keep if _merge == 3
drop _merge 

destring fipscounty, replace force

*** Merge with individual covariates
mmerge nconst using "covariates_movie_data_ln"
keep if _merge == 3
drop _merge

egen group = group(nconst)
xtset fipscounty group

*** Extensive margin: general
gen movie_dum = 1 if total_movie >0
replace movie_dum = 0 if movie_dum == . 

*** Extensive margin: small and big
bysort nconst: gen dum_total_movie_bq4 = (total_movie_bq4>0) if total_movie_bq4!=.
bysort nconst: gen dum_total_movie_sq1 = (total_movie_sq1)>0 if total_movie_sq1!=.

*** Extensive margin: hp movies
gen hp_movie_dum = 1 if total_hp_movie >0
replace hp_movie_dum = 0 if hp_movie_dum == . 

*** Occupation FE
tab occupation, gen(occ)

*** Keep most active individuals
keep if movie_0306 > 0 

*** Drop singleton county observation
bysort fipscounty: egen county_count = count(fipscounty)
drop if county_count == 1

*** Expensive house indicator
xtile quint_2 = HP_2007 if  movie_0306 > 0 & renter == 0, nq(5)
gen quint_52 = (quint_2==5) if quint_2 != .

*** Merge with movie count between 2008 - 2012
mmerge nconst using movies_pre
tab _m
drop _m

***  Merge main dataset with home equity
sort nconst, stable
merge 1:1 nconst using "$data\home_equity_data"
drop _m

sa "data_for_reg_v3", replace
